import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re

def extract_in(inout: str):
    m = re.search(r"in(\d+)", str(inout))
    return int(m.group(1)) if m else None

def extract_and(subdir: str):
    m = re.search(r"and(\d+)", str(subdir))
    return int(m.group(1)) if m else None

def visualize_summary(excel_file: str, name_met = "ANO_metrics_vs_in.png",
                      name_ratio="ANO_unique_ratio.png"):
    in_values = [5, 10, 20, 40, 80]
    in_positions = np.arange(len(in_values))

    # ======== Sheet2 ========
    df_global = pd.read_excel(excel_file, sheet_name="global")
    df_global["in"] = df_global["inout"].apply(extract_in)
    df_global = df_global.dropna(subset=["in"])
    df_global = df_global[df_global["in"].isin(in_values)]
    df_global["xpos"] = df_global["in"].apply(lambda v: in_positions[in_values.index(v)])
    df_global = df_global.sort_values("xpos")

    plt.figure(figsize=(8,6))  # 8:6
    for metric in ["global_unique_ratio", "max_repeat_ratio", "avg_repeat"]:
        subdf = df_global.sort_values("xpos")
        plt.plot(subdf["xpos"], subdf[metric], marker="o", linestyle="-", label=metric, markersize=6)

    plt.xticks(in_positions, [str(v) for v in in_values], fontsize=12, fontweight='bold')
    plt.yticks(fontsize=12, fontweight='bold')
    plt.xlabel("in", fontsize=14, fontweight='bold')
    plt.ylabel("Value", fontsize=14, fontweight='bold')
    plt.title("Sheet2: Global metrics vs in", fontsize=14, fontweight='bold')
    plt.legend(fontsize=12, frameon=False)
    plt.grid(True, linestyle="--", alpha=0.6)
    plt.tight_layout()
    plt.savefig(name_met, dpi=300)
    plt.close()

    # ======== Sheet1 ========
    df_subdirs = pd.read_excel(excel_file, sheet_name="subdirs")
    df_subdirs["in"] = df_subdirs["inout"].apply(extract_in)
    df_subdirs["and"] = df_subdirs["subdir"].apply(extract_and)
    df_subdirs = df_subdirs.dropna(subset=["in", "and"])
    df_subdirs = df_subdirs.sort_values(["in", "and"])

    plt.figure(figsize=(8,6))  # 8:6
    bar_width = 0.12  # width

    for i, in_val in enumerate(in_values):
        subdf = df_subdirs[df_subdirs["in"] == in_val].sort_values("and")
        ands = subdf["and"].tolist()
        ys = subdf["unique_ratio"].tolist()
        colors = plt.cm.Blues(np.linspace(0.4, 0.9, len(ands)))
        start = i - (len(ands) * bar_width) / 2
        xs = [start + j * bar_width for j in range(len(ands))]
        plt.bar(xs, ys, width=bar_width, color=colors, edgecolor=None)

        and_ypos = 0.36 #adjust
        for xv, av in zip(xs, ands):
            plt.text(xv, and_ypos, str(av), ha="center", va="top", fontsize=10, fontweight='bold', rotation=45)

    plt.xticks(in_positions, [str(v) for v in in_values], fontsize=12, fontweight='bold')
    plt.xlabel("in", fontsize=14, fontweight='bold', labelpad=30)  # x axis distance
    plt.ylabel("unique_ratio", fontsize=14, fontweight='bold',labelpad=10)
    plt.yticks(fontsize=12, fontweight='bold')
    plt.title("Sheet1: unique_ratio by in and and-size", fontsize=14, fontweight='bold')
    plt.ylim(0.4, 1.0)
    plt.grid(axis="y", linestyle="--", alpha=0.6)
    plt.subplots_adjust(bottom=0.2)  # avoid overleap
    plt.tight_layout()
    plt.savefig(name_ratio, dpi=300)
    plt.close()

    print(f"Saved: {name_ratio},{name_met}")

if __name__ == "__main__":
    excel_file = "results/summary_ANO.xlsx"
    visualize_summary(excel_file)
